In [1]:
import import_ipynb
import warnings
warnings.filterwarnings('ignore')
from preprocessing import load_data
from model.lgb_model import lgb_model_init, lgb_model_tuned
from model.xgb_model import xgb_model_init, xgb_model_tuned
from model.cat_model import cat_model_init, cat_model_tuned
from model.dnn_model import dnn_epoch_50, dnn_epoch_100
from model.rf_model import rf_model_init,rf_model_tuned
from model.knn_model import knn_model_init,knn_model_tuned

import pandas as pd
import sklearn.metrics as mt
import matplotlib.pyplot as plt
import numpy as np


import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, InputLayer
from lightgbm import LGBMClassifier
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_score
importing Jupyter notebook from preprocessing.ipynb
importing Jupyter notebook from C:\Users\ShinTH\0_knn.ipynb_220604\Untitled Folder 3\src\model\lgb_model.ipynb
importing Jupyter notebook from C:\Users\ShinTH\0_knn.ipynb_220604\Untitled Folder 3\src\model\xgb_model.ipynb
importing Jupyter notebook from C:\Users\ShinTH\0_knn.ipynb_220604\Untitled Folder 3\src\model\cat_model.ipynb
importing Jupyter notebook from C:\Users\ShinTH\0_knn.ipynb_220604\Untitled Folder 3\src\model\dnn_model.ipynb
importing Jupyter notebook from C:\Users\ShinTH\0_knn.ipynb_220604\Untitled Folder 3\src\model\rf_model.ipynb
importing Jupyter notebook from C:\Users\ShinTH\0_knn.ipynb_220604\Untitled Folder 3\src\model\knn_model.ipynb
In [2]:
data,target = load_data('data/Loan_Default.csv')
[IterativeImputer] Completing matrix with shape (143983, 63)
[IterativeImputer] Ending imputation round 1/20, elapsed time 22.61
[IterativeImputer] Change: 4339717.718181094, scaled tolerance: 16508.0 
[IterativeImputer] Ending imputation round 2/20, elapsed time 79.09
[IterativeImputer] Change: 109178.47470880038, scaled tolerance: 16508.0 
[IterativeImputer] Ending imputation round 3/20, elapsed time 129.89
[IterativeImputer] Change: 61326.23123346872, scaled tolerance: 16508.0 
[IterativeImputer] Ending imputation round 4/20, elapsed time 187.54
[IterativeImputer] Change: 47768.54785551413, scaled tolerance: 16508.0 
[IterativeImputer] Ending imputation round 5/20, elapsed time 247.02
[IterativeImputer] Change: 34219.36235358618, scaled tolerance: 16508.0 
[IterativeImputer] Ending imputation round 6/20, elapsed time 303.15
[IterativeImputer] Change: 22454.16177676722, scaled tolerance: 16508.0 
[IterativeImputer] Ending imputation round 7/20, elapsed time 362.25
[IterativeImputer] Change: 13891.02646774809, scaled tolerance: 16508.0 
[IterativeImputer] Early stopping criterion reached.
In [3]:
X_train,X_test,y_train,y_test = train_test_split(data,target,train_size=0.7,random_state=2022)
In [5]:
cat_model = cat_model_tuned(X_train,y_train)
Fitting 5 folds for each of 27 candidates, totalling 135 fits
In [6]:
xgb_model = xgb_model_tuned(X_train,y_train)
Fitting 5 folds for each of 36 candidates, totalling 180 fits
In [7]:
lgb_model = lgb_model_tuned(X_train,y_train)
Fitting 5 folds for each of 54 candidates, totalling 270 fits

모델별 차이를 보여주는 x_test[11] lime xai¶

In [9]:
import numpy as np
from lime import lime_tabular

cat_lime_explainer = lime_tabular.LimeTabularExplainer(np.array(X_train),feature_names = data.columns , mode="classification")
cat_lime_explanation = cat_lime_explainer.explain_instance(X_test.iloc[11], cat_model.predict_proba)
cat_lime_explanation.show_in_notebook(show_table=True)
In [10]:
lgb_lime_explainer = lime_tabular.LimeTabularExplainer(np.array(X_train),feature_names = data.columns , mode="classification")
lgb_lime_explanation = lgb_lime_explainer.explain_instance(X_test.iloc[11], lgb_model.predict_proba)
lgb_lime_explanation.show_in_notebook(show_table=True)
In [11]:
xgb_lime_explainer = lime_tabular.LimeTabularExplainer(np.array(X_train),feature_names = data.columns , mode="classification")
xgb_lime_explanation = lgb_lime_explainer.explain_instance(X_test.iloc[11], xgb_model.predict_proba)
xgb_lime_explanation.show_in_notebook(show_table=True)

모델별 xtest[11] shap force_plot¶

In [13]:
import shap 
cat_shap_explainer = shap.TreeExplainer(cat_model)
cat_shap_values = cat_shap_explainer.shap_values(X_test)
In [14]:
xgb_shap_explainer = shap.TreeExplainer(xgb_model)
xgb_shap_values = xgb_shap_explainer.shap_values(X_test)
In [15]:
lgb_shap_explainer = shap.TreeExplainer(lgb_model)
lgb_shap_values = lgb_shap_explainer.shap_values(X_test)
In [17]:
lgb_shap_values = lgb_shap_values[1]
In [22]:
shap.initjs()
shap.force_plot(cat_shap_explainer.expected_value, cat_shap_values[11, :], X_test.iloc[11, :])
Out[22]:
Visualization omitted, Javascript library not loaded!
Have you run `initjs()` in this notebook? If this notebook was from another user you must also trust this notebook (File -> Trust notebook). If you are viewing this notebook on github the Javascript has been stripped for security. If you are using JupyterLab this error is because a JupyterLab extension has not yet been written.
In [23]:
shap.force_plot(xgb_shap_explainer.expected_value, xgb_shap_values[11, :], X_test.iloc[11, :])
Out[23]:
Visualization omitted, Javascript library not loaded!
Have you run `initjs()` in this notebook? If this notebook was from another user you must also trust this notebook (File -> Trust notebook). If you are viewing this notebook on github the Javascript has been stripped for security. If you are using JupyterLab this error is because a JupyterLab extension has not yet been written.
In [28]:
shap.force_plot(lgb_shap_explainer.expected_value[1], lgb_shap_values[11, :], X_test.iloc[11, :])
Out[28]:
Visualization omitted, Javascript library not loaded!
Have you run `initjs()` in this notebook? If this notebook was from another user you must also trust this notebook (File -> Trust notebook). If you are viewing this notebook on github the Javascript has been stripped for security. If you are using JupyterLab this error is because a JupyterLab extension has not yet been written.

모델별 summary_plot¶

In [25]:
shap.summary_plot(cat_shap_values, X_test)
In [26]:
shap.summary_plot(xgb_shap_values, X_test)
In [27]:
shap.summary_plot(lgb_shap_values, X_test)
In [ ]: